Loading all data

# study area
# studyarea <- st_read("data/StudyArea.shp") %>% 
studyarea <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/studyarea/StudyArea.shp") %>%
  st_transform('EPSG:2272')
## Reading layer `StudyArea' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/studyarea/StudyArea.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -8367662 ymin: 4859107 xmax: -8365440 ymax: 4860628
## Projected CRS: WGS 84 / Pseudo-Mercator
# Philly block groups
# blockgroups <- st_read("data/Philly_blockgroup.shp") %>%
blockgroups <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Philly_blockgroup/Philly_blockgroup.shp") %>%
  st_transform('EPSG:2272')
## Reading layer `Philly_blockgroup' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Philly_blockgroup/Philly_blockgroup.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1338 features and 18 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -8380161 ymin: 4846701 xmax: -8344037 ymax: 4886015
## Projected CRS: WGS 84 / Pseudo-Mercator
# philly bounds
philly_bounds <- st_union(blockgroups)

# philly hydrology (bounded by philly_bounds; source: https://opendataphilly.org/datasets/hydrology/)
hydro <- st_read("https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Hydrographic_Features_Poly/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson") %>% 
    st_transform(crs = 'EPSG:2272') %>% 
  st_intersection(philly_bounds)
## Reading layer `OGRGeoJSON' from data source 
##   `https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Hydrographic_Features_Poly/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 7970 features and 29 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -76.18462 ymin: 38.7667 xmax: -74.71871 ymax: 40.66593
## Geodetic CRS:  WGS 84
# highway
# stateroads_inphilly <- st_read("data/PaStateRoads2024_03.geojson") %>% 
#   st_transform('EPSG:2272') %>% 
#   st_intersection(philly_bounds)

# write out stateroads_inphilly as its own file since it's big
# st_write(stateroads_inphilly,
#          "data/PhillyStateRoads.shp")

# read in pre-filte#1a9988 stateroads data
stateroads_inphilly <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/PhillyStateRoads.shp")
## Reading layer `PhillyStateRoads' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/PhillyStateRoads.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2315 features and 105 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: 2660586 ymin: 207551.4 xmax: 2749290 ymax: 304306.6
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
philly_mainhighways <- stateroads_inphilly %>%
  filter(TRAF_RT_NO %in% c("I", "US"),
         ST_RT_NO %in% c("0001", "0095", "0076", "0676")) %>%
  dplyr::select(STREET_NAM, ST_RT_NO, TRAF_RT_NO, TRAF_RT__1)

# save and only use the highways of interest
# for now, interested in comparing highways that cut through neighborhoods vs those that don't

# ACS 


# philly property data (https://opendataphilly.org/datasets/philadelphia-properties-and-assessment-history/) <-- that was the original source, but now i'm using the same downloaded file that Luming and Sijia have been working from
philly_properties <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/opa_properties_public.geojson") %>% 
  st_transform(crs = 'EPSG:2272')
## Reading layer `opa_properties_public' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/opa_properties_public.geojson' 
##   using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 584002 features and 78 fields (with 171 geometries empty)
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.27439 ymin: 39.87513 xmax: -74.95819 ymax: 40.1377
## Geodetic CRS:  WGS 84
# philly park data (https://opendataphilly.org/datasets/ppr-properties/)
philly_parks <- st_read("https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson") %>%
  st_transform(crs = 'EPSG:2272') %>% 
  st_intersection(philly_bounds)
## Reading layer `PPR_Properties' from data source 
##   `https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 507 features and 25 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -75.28353 ymin: 39.87117 xmax: -74.95865 ymax: 40.13186
## Geodetic CRS:  WGS 84
# philly schools
school <-
  st_read("https://opendata.arcgis.com/datasets/d46a7e59e2c246c891fbee778759717e_0.geojson") %>%
  st_transform('EPSG:2272')
## Reading layer `Schools' from data source 
##   `https://opendata.arcgis.com/datasets/d46a7e59e2c246c891fbee778759717e_0.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 495 features and 15 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.2665 ymin: 39.90781 xmax: -74.97057 ymax: 40.12974
## Geodetic CRS:  WGS 84
# city facilities (https://opendataphilly.org/datasets/city-facilities-master-facilities-database/)
facilities <- st_read("https://opendata.arcgis.com/datasets/b3c133c3b15d4c96bcd4d5cc09f19f4e_0.geojson") %>%
  st_transform('EPSG:2272') %>% 
  filter(STATUS == "A") # remove inactive sites
## Reading layer `City_Facilities_pub' from data source 
##   `https://opendata.arcgis.com/datasets/b3c133c3b15d4c96bcd4d5cc09f19f4e_0.geojson' 
##   using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 3197 features and 32 fields (with 5 geometries empty)
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -75.28169 ymin: 39.86686 xmax: -74.96455 ymax: 40.13108
## Geodetic CRS:  WGS 84
# libraries (from facilities data)
libraries <- facilities %>% 
  filter(ASSET_GROUP1 == "A8")

# produce (LPSS, HPSS -- indicators for produce; low or high produce supply stores)
retail_produce <- st_read("https://opendata.arcgis.com/datasets/53b8a1c653a74c92b2de23a5d7bf04a0_0.geojson") %>%
  st_transform('EPSG:2272')
## Reading layer `370eb703-5a4f-4abb-8920-727cef31373b2020329-1-rcimn.5n39o' from data source `https://opendata.arcgis.com/datasets/53b8a1c653a74c92b2de23a5d7bf04a0_0.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 1336 features and 16 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -75.28031 ymin: 39.86747 xmax: -74.95575 ymax: 40.13793
## Geodetic CRS:  WGS 84
# checking out what Luming_property.csv looks like
luming_property <- read.csv("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Luming_property.csv")
# looking at which properties were saved in luming_property -- Luming and Sijia's properties files are limited to the case study area
# luming_properties <- philly_properties %>% 
#   filter(objectid %in% luming_property$objectid)

# checking out what property_sijia_eda.geojson looks like
sijia_eda <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/property_sijia_eda.geojson")
## Reading layer `property_sijia_eda' from data source 
##   `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/property_sijia_eda.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 2394 features and 98 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 2692333 ymin: 236562 xmax: 2697665 ymax: 240121.5
## Projected CRS: NAD83 / Pennsylvania South (ftUS)

Initial plots

# interstate highways in philly
# ggplot() +
#     geom_sf(data = philly_bounds) +
#     geom_sf(data = stateroads_inphilly %>% 
#               filter(TRAF_RT_NO == "I"))
# 
# # US highways in philly
# ggplot() +
#     geom_sf(data = philly_bounds) +
#     geom_sf(data = stateroads_inphilly %>% 
#               filter(TRAF_RT_NO == "US"))
# 
# # PA state highways in philly
# ggplot() +
#     geom_sf(data = philly_bounds) +
#     geom_sf(data = stateroads_inphilly %>% 
#               filter(TRAF_RT_NO == "PA"))
# 
# # all other roads in philly
# ggplot() +
#     geom_sf(data = philly_bounds) +
#     geom_sf(data = stateroads_inphilly %>% 
#               filter(!(TRAF_RT_NO %in% c("I", "US", "PA"))))

# I-95, US-1, I-676, and I-76
philly_highways_plot <- 
  ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = philly_mainhighways,
          aes(color = ST_RT_NO)) +
  # add custom colors
  scale_color_manual(values = c("0001" = "#1a9988",
                                "0076" = "#eb5600",
                                "0095" = "#eba075",
                                "0676" = "#7fe3d6"),
                     labels = c("US-1", "I-76", "I-95", "I-676")) +
  labs(title = "Case Study of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Highways") +
  theme_void()
philly_highways_plot

# ggsave("Output/HighwaysMap.png", plot = philly_highways_plot)

# looking at study area within philly bounds
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") + 
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = studyarea, fill = "coral")

# philly parks
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887") + 
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent")

# philly schools
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") + 
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = school, color = "brown4") # point data

# libraries
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") + 
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = libraries, color = "#eb5600") # point data

# transit / looking at what this category actually consists of....
# looks like there's point geography for parking lots, some random transit stops, and heli pad station
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") + 
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = facilities %>% 
            filter(ASSET_GROUP1 == "A17",
                   grepl("Parking", ASSET_SUBT1_DESC)), color = "pink") # point data

ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") + 
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = sijia_eda, color = "#7fe3d6")

# prison facilities
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887") + 
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = facilities %>% 
            filter(ASSET_GROUP2 == "A13.3"), color = "lightgrey") # point data

# all philly properties
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887") + 
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = philly_properties, color = "black")

Case Study of Philadelphia’s Highways and Housing Price

For all Philly properties, calculate the distance from highway and see how it relates to price.

# taking Luming's distance calculator function
calculate_nearest_distance <- function(set_points, other_layer) {
  nearest_idx <- st_nearest_feature(set_points, other_layer)
  st_distance(set_points, other_layer[nearest_idx, ], by_element = TRUE) %>% as.numeric()
}

The following code chunk took too much to load (too many properties to consider), so I’m going to limit it to the properties within 500m (and possibly look at 1000m and 1500m) from highways to look at the immediate effects of highways on property prices.

# only keep properties that are within 500m (1640.42 ft) of highways of interest
buff500 <- 500 * 3.28084
highways_500buffer <- st_union(st_buffer(philly_mainhighways, buff500))

properties500 <- philly_properties[highways_500buffer,]

# only keep properties that are within 1000m (3280.84 ft) of highways of interest
buff1000 <- 1000 * 3.28084
highways_1000buffer <- st_union(st_buffer(philly_mainhighways, buff1000))

properties1000 <- philly_properties[highways_1000buffer,]

# only keep properties that are within 1500m (4921.26 ft) of highways of interest
buff1500 <- 1500 * 3.28084
highways_1500buffer <- st_union(st_buffer(philly_mainhighways, buff1500))

properties1500 <- philly_properties[highways_1500buffer,]

Properties within 500m of highways

# visualizing buffer
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = philly_mainhighways,
          aes(color = ST_RT_NO)) +
  
  # add custom colors
  scale_color_manual(values = c("0001" = "#1a9988",
                                "0076" = "#eb5600",
                                "0095" = "#eba075",
                                "0676" = "#7fe3d6")) +
  
  labs(title = "Case Study of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Highways",
       fill = "Highways") +
  theme_void()

# visualizing properties to make sure we got the right ones
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties500,
          color = "#4a4a4a", size = .3) +
  geom_sf(data = philly_mainhighways,
          aes(color = ST_RT_NO)) +
  
  # add custom colors
  scale_color_manual(values = c("0001" = "#1a9988",
                                "0076" = "#eb5600",
                                "0095" = "#eba075",
                                "0676" = "#7fe3d6")) +
  
  labs(title = "Properties within 500m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Highways",
       fill = "Highways") +
  theme_void()

ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_1000buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties1000,
          color = "#4a4a4a", size = .3) +
  geom_sf(data = philly_mainhighways,
          aes(color = ST_RT_NO)) +
  
  # add custom colors
  scale_color_manual(values = c("0001" = "#1a9988",
                                "0076" = "#eb5600",
                                "0095" = "#eba075",
                                "0676" = "#7fe3d6"),
                       labels = c("US-1", "I-76", "I-95", "I-676")) +
  
  labs(title = "Properties within 1000m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Highways",
       fill = "Highways") +
  theme_void()

ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_1500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties1500,
          color = "#4a4a4a", size = .3) +
  geom_sf(data = philly_mainhighways,
          aes(color = ST_RT_NO)) +
  
  # add custom colors
  scale_color_manual(values = c("0001" = "#1a9988",
                                "0076" = "#eb5600",
                                "0095" = "#eba075",
                                "0676" = "#7fe3d6")) +
  
  labs(title = "Properties within 1500m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Highways",
       fill = "Highways") +
  theme_void()

Are there differences between how price of property relates to dist_to_closest_highway depending on type of highway (whether highway cuts through neighborhood or not)?

# starting with 500m buffer
# need to check if there are 0 values in sale_price before log transforming
summary(properties500$sale_price)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
##         0        10     89900    327917    230000 342000000       840
# Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
#   0        10     89900    327917    230000 342000000       840 

# only keep properties whose sale_price >= $10,000
# according to #1a9988fin, the most expensive house in philadelphia right now is $25,000,000 -- using that as a cutoff
properties500_clean <- properties500 %>% 
  filter(sale_price >= 10000,
         sale_price <= 25000000,
         total_area >= 100,
         total_livable_area >= 100,
         sale_date >= as.Date("2005-01-01", format = "%Y-%m-%d"),
         sale_date <= Sys.Date()) %>% 
  mutate(price_perTLA = sale_price / total_livable_area,
         price_perTA = sale_price / total_area,
         log_price = log(sale_price),
         log_price_perTLA = log(price_perTLA),
         log_price_perTA = log(price_perTA),
         norm_log_price = scale(log_price),
         norm_log_price_perTLA = scale(log_price_perTLA),
         norm_log_price_perTA = scale(log_price_perTA),
         category_easy = case_when(category_code_description %in% c("APARTMENTS  > 4 UNITS",
                                                                    "MIXED USE",
                                                                    "MULTI FAMILY",
                                                                    "SINGLE FAMILY") ~ "Residential or Mixed",
                                   grepl("INDUS", category_code_description) ~ "Industrial",
                                   grepl("HOTEL", category_code_description) ~ "Hotel",
                                   grepl("OFFICES", category_code_description) ~ "Offices",
                                   category_code_description %in% c("COMMERCIAL",
                                                                    "GARAGE - COMMERCIAL",
                                                                    "RETAIL") ~ "Commercial",
                                   grepl("VACANT", category_code_description) ~ "Vacant",
                                   .default = "Other"))

summary(properties500_clean$sale_price)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    10000   103000   174900   340263   295000 24750000
#  Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
# 10000   103000   174900   340263   295000 24750000
summary(properties500_clean$price_perTLA)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     0.77    78.86   130.36   181.26   208.33 11527.50
# Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
# 0.77    78.86   130.36   181.26   208.33 11527.5
summary(properties500_clean$price_perTA)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     0.23    58.35    99.21   218.95   220.79 22671.57
# Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
# 0.23    58.35    99.21   218.95   220.79 22671.57

# correlations between total_area and total_livable_area, and sale_price
# cor(properties500_clean$total_area, properties500_clean$total_livable_area, use = "pairwise.complete.obs") # 0.5443428
# cor(properties500_clean$total_area, properties500_clean$sale_price, use = "pairwise.complete.obs") # 0.4210804
# cor(properties500_clean$total_livable_area, properties500_clean$sale_price, use = "pairwise.complete.obs") # 0.577406

# histogram of sale_price
ggplot(data = properties500_clean) +
  geom_histogram(aes(x = sale_price),
                 bins = 100)

ggplot(data = properties500_clean) +
  geom_histogram(aes(x = price_perTA),
                 bins = 100)

ggplot(data = properties500_clean) +
  geom_histogram(aes(x = price_perTLA),
                 bins = 100)

ggplot(data = properties500_clean) +
  geom_histogram(aes(x = log_price),
                 bins = 100)

ggplot(data = properties500_clean) +
  geom_histogram(aes(x = log_price_perTA),
                 bins = 100)

ggplot(data = properties500_clean) +
  geom_histogram(aes(x = log_price_perTLA),
                 bins = 100)

# map of properties within 500 m of highway, colored by sale_price
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
#   geom_sf(data = highways_500buffer,
#           color = "transparent", fill = "grey", alpha = 0.4) +
#   geom_sf(data = properties500_clean,
#           aes(color = sale_price), size = .3) +
#   geom_sf(data = philly_mainhighways,
#           color = "black") +
#   labs(title = "Sale Price of Properties within 500m of Highways in Philly",
#        subtitle = "US-1, I-76, I-95, and I-676") +
#   theme_void()

# map of properties within 500 m of highway, colored by price_perTA
# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
#   geom_sf(data = highways_500buffer,
#           color = "transparent", fill = "grey", alpha = 0.4) +
#   geom_sf(data = properties500_clean,
#           aes(color = price_perTA), size = .3) +
#   geom_sf(data = philly_mainhighways,
#           color = "black") +
#   labs(title = "Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
#        subtitle = "US-1, I-76, I-95, and I-676") +
#   theme_void()

# ggplot() +
#   geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
#   geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
#   geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
#   geom_sf(data = highways_500buffer,
#           color = "transparent", fill = "grey", alpha = 0.4) +
#   geom_sf(data = properties500_clean,
#           aes(color = price_perTLA), size = .3) +
#   geom_sf(data = philly_mainhighways,
#           color = "black") +
#   labs(title = "Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
#        subtitle = "US-1, I-76, I-95, and I-676") +
#   theme_void()

ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "black", alpha = 0.2) +
  geom_sf(data = properties500_clean,
          aes(color = log_price), size = .3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  labs(title = "Log Sale Price of Properties within 500m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Log Price") +
  theme_void()

# house sale price (category_easy == "Residential or Mixed")
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "black", alpha = 0.2) +
  geom_sf(data = properties500_clean %>% 
            filter(grepl("Resident", category_easy)),
          aes(color = log_price), size = .3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  labs(title = "Log Sale Price of Properties within 500m of Highways in Philly",
       subtitle = "Residential or Mixed Use properties only\nUS-1, I-76, I-95, and I-676",
       color = "Log Price") +
  theme_void()

ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties500_clean,
          aes(color = log_price_perTA), size = .3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  labs(title = "Log Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Log Price (per TA)") +
  theme_void()

# same thing but residential or mixed use only
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties500_clean %>% 
            filter(grepl("Resident", category_easy)),
          aes(color = log_price_perTA), size = .3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  labs(title = "Log Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
       subtitle = "Residential or Mixed Use properties only\nUS-1, I-76, I-95, and I-676",
       color = "Log Price (per TA)") +
  theme_void()

ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties500_clean,
          aes(color = log_price_perTLA), size = .3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  labs(title = "Log Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Log Price (per TLA)") +
  theme_void()

# same thing but residential/mixed use only
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = properties500_clean %>% 
            filter(grepl("Resident", category_easy)),
          aes(color = log_price_perTLA), size = .3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  labs(title = "Log Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
       subtitle = "Residential or Mixed Use properties only\nUS-1, I-76, I-95, and I-676",
       color = "Log Price (per TLA)") +
  theme_void()

Calculate each property’s distance to highways.

# calculate each property's distance to highways of interest
# distance is in feet
# start_time <- Sys.time()
prop500 <- properties500_clean %>% 
  select(matches("assessment_date|building_code|category_|census_tract|geographic|zip|market_value|parcel|sale_|taxable|_area|year_built|objectid|geometry|price")) %>% 
  mutate(dist_to_US001 = calculate_nearest_distance(geometry,
                                     philly_mainhighways %>%
                                       filter(ST_RT_NO == "0001")),
         dist_to_I076 = calculate_nearest_distance(geometry,
                                    philly_mainhighways %>%
                                      filter(ST_RT_NO == "0076")),
         dist_to_I095 = calculate_nearest_distance(geometry,
                                    philly_mainhighways %>%
                                      filter(ST_RT_NO == "0095")),
         dist_to_I676 = calculate_nearest_distance(geometry,
                                    philly_mainhighways %>%
                                      filter(ST_RT_NO == "0676")),
         dist_to_US001m = dist_to_US001 * 0.3048,
         dist_to_I076m = dist_to_I076 * 0.3048,
         dist_to_I095m = dist_to_I095 * 0.3048,
         dist_to_I676m = dist_to_I676 * 0.3048) # Time difference of 55.20697 secs

# find closest highway
prop500_closest <- prop500 %>%
  select(matches("objectid|dist_to")) %>%
  pivot_longer(cols = 2:5,
               names_to = "highway",
               values_to = "distance") %>%
  group_by(objectid) %>%
  summarise(dist_to_closest_highway = min(distance, na.rm = T)) %>%
  ungroup()  %>% 
  mutate(dist_to_closest_highwaym = dist_to_closest_highway * 0.3048) 

# join closest highway
prop500 <- left_join(prop500,
                     prop500_closest %>% st_drop_geometry(),
                     by = "objectid") %>%
  mutate(closest_highway = case_when(
    dist_to_closest_highway == dist_to_US001 ~ "US-1",
    dist_to_closest_highway == dist_to_I076 ~ "I-76",
    dist_to_closest_highway == dist_to_I095 ~ "I-95",
    dist_to_closest_highway == dist_to_I676 ~ "I-676",
    .default = NA),
    closest_highway = factor(closest_highway, levels = c("US-1", "I-76", "I-676", "I-95")),
    highway_type = case_when(closest_highway %in% c("US-1","I-676") ~ "through neighborhood",
                             closest_highway %in% c("I-95","I-76") ~ "along waterway",
                             .default = NA))

# save file
# st_write(prop500, "/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_prop500_dist2highway.geojson")

# read in pre-saved file
# prop500_new <- st_read("/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/data/philly_prop500_dist2highway.geojson")

First, let’s looks at a faceted plot of the distribution of sales price for each highway separately

# prop500_faceted <- 
#   ggplot(data = prop500,
#          aes(x = dist_to_closest_highwaym, y = log_price_perTA)) +
#   geom_point(color = "#1a9988", alpha = .1, size = 0.3) +
#   geom_smooth(color = "#eb5600", method = "lm", se = F, linewidth = 1.2) +
#   # geom_point(alpha = .1, size = 0.3) +
#   # geom_smooth(method = "lm", se = F, linewidth = 1.2) +
#   facet_wrap(~closest_highway) +
#   labs(title = "Distribution of Property Sale Price",
#        x = "Distance to Closest Highway (m)",
#        y = "Log Sale Price (per TA)") +
#   theme_minimal()

prop500_faceted <- 
  ggplot(data = prop500,
         aes(x = log_price_perTA, color = closest_highway, fill = closest_highway)) +
  geom_histogram(bins = 50) +
  facet_wrap(~closest_highway, scales = "free_y") +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Distribution of Property Sale Price",
       x = "Log Sale Price (per Total Area)",
       y = "Count",
       color = "Closest Highway",
       fill = "Closest Highway") +
  theme_minimal()

prop500_faceted

# save image for slides
# ggsave("Output/SalePriceDistribution.png", plot = prop500_faceted)

# residential + mixed only 
# prop500_faceted_res <- 
#   ggplot(data = prop500 %>% 
#            filter(grepl("Reside", category_easy)),
#          aes(x = dist_to_closest_highwaym, y = log_price_perTA)) +
#   geom_point(color = "#1a9988", alpha = .1, size = 0.3) +
#   geom_smooth(color = "#eb5600", method = "lm", se = F, linewidth = 1.2) +
#   # geom_point(alpha = .1, size = 0.3) +
#   # geom_smooth(method = "lm", se = F, linewidth = 1.2) +
#   # facet_wrap(closest_highway) +
#   labs(title = "Distribution of Property Sale Price",
#        x = "Distance to Closest Highway (m)",
#        y = "Log Sale Price (per TA)") +
#   theme_minimal()

prop500_faceted_res <- 
  ggplot(data = prop500 %>% 
           filter(grepl("Resid", category_easy)),
         aes(x = log_price_perTA, color = closest_highway, fill = closest_highway)) +
  geom_histogram(bins = 50) +
  facet_wrap(~closest_highway, scales = "free_y") +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Distribution of Property Sale Price",
       subtitle = "Residential and Mixed Properties only",
       x = "Log Sale Price (per Total Area)",
       y = "Count",
       color = "Closest Highway",
       fill = "Closest Highway") +
  theme_minimal()

prop500_faceted_res

# save image for slides
# ggsave("Output/SalePriceDistribution_residential.png", plot = prop500_faceted_res)

Now, look at relationship between distance to highway and price.

# map of properties within 500m of highway cored by which highway they're closest to
ggplot() +
  geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
  geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
  geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
  geom_sf(data = highways_500buffer,
          color = "transparent", fill = "grey", alpha = 0.4) +
  geom_sf(data = prop500,
          aes(color = closest_highway), size = 0.3) +
  geom_sf(data = philly_mainhighways,
          color = "black") +
  
  # add custom colors
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  
  labs(title = "Properties within 500m of Highways in Philly",
       subtitle = "US-1, I-76, I-95, and I-676",
       color = "Closest Highway") +
  theme_void()

# simple scatter of price and dist2closesthighway
ggplot(data = prop500, 
       aes(x = dist_to_closest_highwaym, y = sale_price, 
           color = closest_highway, fill = closest_highway)) +
  geom_point(alpha = .2) +
  geom_smooth(method = "lm") +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Property Price as a function of Distance to Closest Highway",
       subtitle = "Colored by Closest Highway; Properties within 500m of highways",
       fill = "Closest Highway",
       color = "Closest Highway",
       x = "Distance to Closest Highway (m)",
       y = "Property Sale Price ($)") +
  theme_minimal()

# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
scatter_4highways <- 
  ggplot(data = prop500, 
       aes(x = dist_to_closest_highwaym, y = log_price_perTA, 
           color = closest_highway, fill = closest_highway)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "lm", size = 1.2) +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway",
       subtitle = "Colored by Closest Highway; Properties within 500m of highways",
       fill = "Closest Highway",
       color = "Closest Highway",
       x = "Distance to Closest Highway (m)",
       y = "Property Sale Price (log price per TA)") +
  theme_minimal()

scatter_4highways

# save image for slides
# ggsave("Output/PriceVSDist2Highway_4highways.png",
#        plot = scatter_4highways,
#        height = 7,
#        width = 10)

# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway 
# sale_price <= 1e7
ggplot(data = prop500 %>% 
         filter(sale_price <= 1e7), 
       aes(x = dist_to_closest_highwaym, y = log_price_perTA, 
           color = closest_highway, fill = closest_highway)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "lm", size = 1.2) +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway",
       subtitle = "Colored by Closest Highway; Properties within 500m of highways",
       fill = "Closest Highway",
       color = "Closest Highway",
       x = "Distance to Closest Highway (m)",
       y = "Property Sale Price (log price per TA)") +
  theme_minimal()

# simple scatter of price (as log price per total area in sq ft) and log(dist2closesthighway)
ggplot(data = prop500, 
       aes(x = log(dist_to_closest_highwaym), y = log_price_perTA, 
           color = closest_highway, fill = closest_highway)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "lm", size = 1.2) +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway (log)",
       subtitle = "Colored by Closest Highway; Properties within 500m of highways",
       fill = "Closest Highway",
       color = "Closest Highway",
       x = "Log Distance to Closest Highway (m)",
       y = "Log Property Sale Price (per TA)") +
  theme_minimal()

# simple regression of price (log_price_perTA) and dist2closesthighway
fit1 <- lm(log_price_perTA ~ dist_to_closest_highwaym, data = prop500)

summary(fit1)
## 
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym, data = prop500)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.1688 -0.6772 -0.1453  0.6566  5.2436 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               4.804e+00  1.298e-02  370.09  < 2e-16 ***
## dist_to_closest_highwaym -2.111e-04  4.172e-05   -5.06 4.21e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.069 on 37293 degrees of freedom
## Multiple R-squared:  0.0006861,  Adjusted R-squared:  0.0006593 
## F-statistic: 25.61 on 1 and 37293 DF,  p-value: 4.209e-07
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (4 types)
fit2 <- lm(log_price_perTA ~ dist_to_closest_highwaym + closest_highway, data = prop500)

summary(fit2)
## 
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + closest_highway, 
##     data = prop500)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.6571 -0.5245  0.0563  0.5566  4.6625 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               4.312e+00  1.222e-02 352.831  < 2e-16 ***
## dist_to_closest_highwaym -2.741e-04  3.662e-05  -7.483 7.41e-14 ***
## closest_highwayI-76       9.473e-01  1.885e-02  50.255  < 2e-16 ***
## closest_highwayI-676      1.848e+00  3.899e-02  47.390  < 2e-16 ***
## closest_highwayI-95       1.011e+00  1.019e-02  99.169  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9317 on 37290 degrees of freedom
## Multiple R-squared:  0.2412, Adjusted R-squared:  0.2411 
## F-statistic:  2964 on 4 and 37290 DF,  p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (4 types) with interaction
fit3 <- lm(log_price_perTA ~ dist_to_closest_highwaym + closest_highway + dist_to_closest_highwaym*closest_highway, data = prop500)

summary(fit3)
## 
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + closest_highway + 
##     dist_to_closest_highwaym * closest_highway, data = prop500)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.4704 -0.5124  0.0547  0.5539  4.6450 
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                    4.186e+00  1.573e-02 266.166
## dist_to_closest_highwaym                       1.813e-04  5.120e-05   3.542
## closest_highwayI-76                            3.368e-01  5.506e-02   6.116
## closest_highwayI-676                           1.986e+00  8.292e-02  23.950
## closest_highwayI-95                            1.374e+00  2.338e-02  58.773
## dist_to_closest_highwaym:closest_highwayI-76   1.752e-03  1.578e-04  11.100
## dist_to_closest_highwaym:closest_highwayI-676 -5.065e-04  3.115e-04  -1.626
## dist_to_closest_highwaym:closest_highwayI-95  -1.308e-03  7.586e-05 -17.243
##                                               Pr(>|t|)    
## (Intercept)                                    < 2e-16 ***
## dist_to_closest_highwaym                      0.000398 ***
## closest_highwayI-76                           9.69e-10 ***
## closest_highwayI-676                           < 2e-16 ***
## closest_highwayI-95                            < 2e-16 ***
## dist_to_closest_highwaym:closest_highwayI-76   < 2e-16 ***
## dist_to_closest_highwaym:closest_highwayI-676 0.103936    
## dist_to_closest_highwaym:closest_highwayI-95   < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9252 on 37287 degrees of freedom
## Multiple R-squared:  0.2519, Adjusted R-squared:  0.2517 
## F-statistic:  1793 on 7 and 37287 DF,  p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types)
fit4 <- lm(log_price_perTA ~ dist_to_closest_highwaym + highway_type, data = prop500)

summary(fit4)
## 
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + highway_type, 
##     data = prop500)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.6296 -0.5491  0.0288  0.5530  5.6683 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       5.337e+00  1.293e-02 412.760   <2e-16 ***
## dist_to_closest_highwaym         -3.567e-04  3.747e-05  -9.521   <2e-16 ***
## highway_typethrough neighborhood -9.454e-01  9.953e-03 -94.977   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9595 on 37292 degrees of freedom
## Multiple R-squared:  0.1953, Adjusted R-squared:  0.1953 
## F-statistic:  4526 on 2 and 37292 DF,  p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types) with interaction
fit5 <- lm(log_price_perTA ~ dist_to_closest_highwaym + highway_type + dist_to_closest_highwaym*highway_type, data = prop500)

summary(fit5)
## 
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + highway_type + 
##     dist_to_closest_highwaym * highway_type, data = prop500)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5438 -0.5400  0.0276  0.5516  5.7417 
## 
## Coefficients:
##                                                             Estimate Std. Error
## (Intercept)                                                5.454e+00  1.698e-02
## dist_to_closest_highwaym                                  -7.651e-04  5.363e-05
## highway_typethrough neighborhood                          -1.169e+00  2.331e-02
## dist_to_closest_highwaym:highway_typethrough neighborhood  7.954e-04  7.485e-05
##                                                           t value Pr(>|t|)    
## (Intercept)                                                321.19   <2e-16 ***
## dist_to_closest_highwaym                                   -14.27   <2e-16 ***
## highway_typethrough neighborhood                           -50.16   <2e-16 ***
## dist_to_closest_highwaym:highway_typethrough neighborhood   10.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.958 on 37291 degrees of freedom
## Multiple R-squared:  0.1978, Adjusted R-squared:  0.1977 
## F-statistic:  3064 on 3 and 37291 DF,  p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types) with interaction
fit6 <- lm(log_price_perTA ~ log(dist_to_closest_highwaym) + highway_type + log(dist_to_closest_highwaym)*highway_type, data = prop500)

summary(fit6)
## 
## Call:
## lm(formula = log_price_perTA ~ log(dist_to_closest_highwaym) + 
##     highway_type + log(dist_to_closest_highwaym) * highway_type, 
##     data = prop500)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5984 -0.5404  0.0279  0.5509  5.7509 
## 
## Coefficients:
##                                                                Estimate
## (Intercept)                                                     6.05426
## log(dist_to_closest_highwaym)                                  -0.14914
## highway_typethrough neighborhood                               -1.84302
## log(dist_to_closest_highwaym):highway_typethrough neighborhood  0.16410
##                                                                Std. Error
## (Intercept)                                                       0.06130
## log(dist_to_closest_highwaym)                                     0.01108
## highway_typethrough neighborhood                                  0.08546
## log(dist_to_closest_highwaym):highway_typethrough neighborhood    0.01549
##                                                                t value Pr(>|t|)
## (Intercept)                                                      98.76   <2e-16
## log(dist_to_closest_highwaym)                                   -13.47   <2e-16
## highway_typethrough neighborhood                                -21.57   <2e-16
## log(dist_to_closest_highwaym):highway_typethrough neighborhood   10.59   <2e-16
##                                                                   
## (Intercept)                                                    ***
## log(dist_to_closest_highwaym)                                  ***
## highway_typethrough neighborhood                               ***
## log(dist_to_closest_highwaym):highway_typethrough neighborhood ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9583 on 37291 degrees of freedom
## Multiple R-squared:  0.1973, Adjusted R-squared:  0.1973 
## F-statistic:  3056 on 3 and 37291 DF,  p-value: < 2.2e-16

Visualizing the highway types (cutting through neighborhoods vs running alongside water).

# simple scatter of price and dist2closesthighway
ggplot(data = prop500, 
       aes(x = dist_to_closest_highwaym, y = sale_price, 
           color = highway_type, fill = highway_type)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "lm") +
  scale_color_manual(values = c("through neighborhood" = "#1a9988",
                                "along waterway" = "#eb5600")) +
  scale_fill_manual(values = c("through neighborhood" = "#1a9988",
                               "along waterway" = "#eb5600")) +
  labs(title = "Property Price as a function of Distance to Highway Type",
       subtitle = "Colored by Highway Type; Properties within 500m of highways",
       fill = "Highway Type",
       color = "Highway Type") +
  theme_minimal()

# simple scatter of price and dist2closesthighway with loess method
ggplot(data = prop500, 
       aes(x = dist_to_closest_highwaym, y = sale_price, 
           color = highway_type, fill = highway_type)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "loess", se = FALSE, size = 1.2) +
  scale_color_manual(values = c("through neighborhood" = "#1a9988",
                                "along waterway" = "#eb5600")) +
  scale_fill_manual(values = c("through neighborhood" = "#1a9988",
                               "along waterway" = "#eb5600")) +
  labs(title = "Property Price as a function of Distance to Highway Type",
       subtitle = "Colored by Highway Type; Properties within 500m of highways",
       fill = "Highway Type",
       color = "Highway Type") +
  theme_minimal()

# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
scatter_2types <- 
  ggplot(data = prop500, 
       aes(x = dist_to_closest_highwaym, y = log_price_perTA, 
           color = highway_type, fill = highway_type)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "lm", size = 1.2) +
  scale_color_manual(values = c("through neighborhood" = "#1a9988",
                                "along waterway" = "#eb5600")) +
  scale_fill_manual(values = c("through neighborhood" = "#1a9988",
                               "along waterway" = "#eb5600")) +
  labs(title = "Property Price (log price per TA) as a function of Distance to Highway Type",
       subtitle = "Colored by Highway Type; Properties within 500m of highways",
       fill = "Highway Type",
       color = "Highway Type",
       x = "Distance to Closest Highway (m)",
       y = "Log Sale Price (per TA)") +
  theme_minimal()
scatter_2types

# save for slides
# ggsave("Output/PriceVSDist2Highway_2types.png", 
#        plot = scatter_2types,
#        height = 7,
#        width = 10)

# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
ggplot(data = prop500, 
       aes(x = log(dist_to_closest_highwaym), y = log_price_perTA, 
           color = highway_type, fill = highway_type)) +
  geom_point(alpha = .2, size = 0.3) +
  geom_smooth(method = "lm", size = 1.2) +
  scale_color_manual(values = c("through neighborhood" = "#1a9988",
                                "along waterway" = "#eb5600")) +
  scale_fill_manual(values = c("through neighborhood" = "#1a9988",
                               "along waterway" = "#eb5600")) +
  labs(title = "Property Price (log price per TA) as a function of Distance to Highway Type",
       subtitle = "Colored by Highway Type; Properties within 500m of highways",
       fill = "Highway Type",
       color = "Highway Type",
       x = "Distance to Closest Highway (m)",
       y = "Log Sale Price (per TA)") +
  theme_minimal()

# correlations
prop500 %>%
  st_drop_geometry() %>%
  group_by(highway_type) %>%
  summarize(correlation = cor(dist_to_closest_highwaym, log_price_perTA, method = "pearson"), .groups = "drop") %>% 
  mutate(correlation = round(correlation, 3)) %>% 
  kbl(col.names = c("Highway Type", "Correlation"),
        align = c("l", "c"),
      caption = "Correlation Between Property Sale Price and Distance to Closest Highway by Highway Types") %>% 
    kable_styling() %>% 
    kable_classic(full_width = F, html_font = "Cambria") 
Correlation Between Property Sale Price and Distance to Closest Highway by Highway Types
Highway Type Correlation
along waterway -0.091
through neighborhood 0.005
# correlation of price and dist to highway for each highway
cor <- prop500 %>%
  st_drop_geometry() %>%
  group_by(closest_highway) %>%
  summarize(correlation = cor(dist_to_closest_highwaym, log_price_perTA, method = "pearson"), .groups = "drop")
cor
## # A tibble: 4 × 2
##   closest_highway correlation
##   <fct>                 <dbl>
## 1 US-1                 0.0338
## 2 I-76                 0.181 
## 3 I-676               -0.0420
## 4 I-95                -0.139
prop500 %>%
  st_drop_geometry() %>%
  group_by(closest_highway) %>%
  summarize(correlation = cor(log(dist_to_closest_highwaym), log_price_perTA, method = "pearson"), .groups = "drop")
## # A tibble: 4 × 2
##   closest_highway correlation
##   <fct>                 <dbl>
## 1 US-1                0.0363 
## 2 I-76                0.211  
## 3 I-676               0.00866
## 4 I-95               -0.133

Limiting the properties further to those under $1mil

prop500_under1m <- prop500 %>%
  filter(sale_price < 1e6)
ggplot(data = prop500_under1m, 
       aes(x = dist_to_closest_highwaym, y = sale_price, 
           color = closest_highway, fill = closest_highway)) +
  geom_point(alpha = .1, size = 0.3) +
  geom_smooth(method = "loess", se = FALSE, size = 1.2) +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Property Price (under $1M) as a function of Distance to Closest Highway",
       subtitle = "Colored by Closest Highway; Properties within 500m of highways",
       fill = "Closest Highway",
       color = "Closest Highway") +
  theme_minimal()

# correlation of price and dist to highway for each highway
cor <- prop500_under1m %>%
  st_drop_geometry() %>%
  group_by(closest_highway) %>%
  summarize(correlation = cor(dist_to_closest_highway, log_price_perTA, method = "pearson"), .groups = "drop")
cor
## # A tibble: 4 × 2
##   closest_highway correlation
##   <fct>                 <dbl>
## 1 US-1                 0.0494
## 2 I-76                 0.157 
## 3 I-676                0.0490
## 4 I-95                -0.146
# correlation of price and dist to closest highway
prop500_under1m %>%
  st_drop_geometry() %>%
  summarize(correlation = cor(dist_to_closest_highway, log_price_perTA, method = "pearson"), .groups = "drop")
##   correlation
## 1 -0.02534909
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
ggplot(data = prop500_under1m, 
       aes(x = dist_to_closest_highwaym, y = log_price_perTA, 
           color = closest_highway, fill = closest_highway)) +
  geom_point(alpha = .1, size = 0.3) +
  geom_smooth(method = "lm", se=F, size = 1.2) +
  scale_color_manual(values = c("US-1" = "#1a9988",
                                "I-76" = "#eb5600",
                                "I-95" = "#eba075",
                                "I-676" = "#7fe3d6")) +
  scale_fill_manual(values = c("US-1" = "#1a9988",
                               "I-76" = "#eb5600",
                               "I-95" = "#eba075",
                               "I-676" = "#7fe3d6")) +
  labs(title = "Property Price (log price per TA; under $1M) as a function of Distance to Closest Highway",
       subtitle = "Colored by Closest Highway; Properties within 500m of highways",
       fill = "Closest Highway",
       color = "Closest Highway") +
  theme_minimal()

Properties within 1000m of Highways

Relationship between Housing Price and other variables

Notably, I’m exploring the relationship between housing price and distance to highways, distance to parks, distance to commercial areas, distance to schools.

For now, I’m using the study area-related data that Luming and Sijia compiled